Part 1

Row

Accidents per year

Accidents by weekday

Row

Accidents by hour

Deaths by hour

Part 2

Row

Deaths by speed zone

Row

Death rate by year of vehicle manufacture

Regression model

Goodness of fit

term estimate std.error statistic p.value
(Intercept) 0.4420226 0.0627854 7.040211 0e+00
vehicle_year_manuf -0.0002169 0.0000314 -6.918470 1e-07
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
0.5846833 0.5724681 0.0019542 47.86523 1e-07 1 174.5064 -343.0127 -338.2622 0.0001298 34 36
---
title: "Motor Vehicle Accidents in Victoria"
output: 
  flexdashboard::flex_dashboard:
        vertical_layout: scroll
        orientation: rows
        source_code: embed
---

```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(lubridate)
library(janitor)
library(plotly)
library(ggResidpanel)
library(broom)
library(knitr)
library(kableExtra)
library(plotly)
```

```{r load-data}
accidents <- read_csv("data/ACCIDENT.csv") %>%
  clean_names()

locations <- read_csv("data/ACCIDENT_LOCATION.csv") %>%
  clean_names()

nodes <- read_csv("data/NODE.csv") %>%
  clean_names()

persons <- read_csv("data/PERSON.csv") %>%
  clean_names()

vehicles <- read_csv("data/VEHICLE.csv") %>%
  clean_names()
```

```{r create-year-hour-day}
accidents <- accidents %>%
  mutate(accidentdate = dmy(accidentdate),
         Year = year(accidentdate),
         Hour = hour(accidenttime),
         Weekday = wday(accidentdate, 
                        label = TRUE,
                        abbr = FALSE))
```

Part 1 {data-icon="fa-battery-quarter"}
===================================== 

Row {data-width=600}
--------------------------------
### Accidents per year

```{r chen1}
accidents_per_year <- accidents %>%
  count(Year) %>% 
  ggplot(aes(x = Year, 
             y = n)) +
  geom_line() +
  xlab("Year(2006 ~ 2020)") +
  ylab("Number of Car Accidents") +
  geom_point() 

ggplotly( accidents_per_year)
```

### Accidents by weekday

```{r chen2}
accidents %>%
  count(Weekday,
        name = "Accidents") %>% 
  ggplot(aes(x = Weekday, 
             y = Accidents)) +
  geom_bar(stat = "identity", 
           fill = "#999999") +
  ylab("Number of Car Accidents") +
  geom_text(aes(label = Accidents), 
            vjust = -1, 
            color = "black", 
            size = 3)
```


Row {data-width=600}
--------------------------------
### Accidents by hour

```{r chen3}
accidents_by_hour <- accidents %>%
  count(Hour,
        name = "Accidents") 

p3 <- accidents_by_hour %>%
  ggplot(aes(x = Hour, 
             y = Accidents)) + 
  geom_line() +
  xlab("Time") +
  ylab("Number of Car Accidents") +
  geom_point()

ggplotly(p3)
```


### Deaths by hour

```{r deaths-by-hour}
deaths_by_hour <- accidents %>%
  group_by(Hour) %>%
  tally(no_persons_killed,
        name = "Deaths")
```

```{r deaths-per-accident-by-hour}
deaths_per_accident_by_hour <- accidents_by_hour %>%
  left_join(deaths_by_hour) %>%
  mutate(Deaths_per_accident = round(Deaths/Accidents, digit = 4))
```

```{r chen4}
p4 <- deaths_per_accident_by_hour %>%
  ggplot(aes(x = Hour, 
             y = Deaths_per_accident)) +
  geom_line() +
  xlab("Time") +
  ylab("Death Rate of Car Accidents") +
  geom_point()  
ggplotly(p4)
```

Part 2 {data-icon="fa-battery-half"}
===================================== 

Row {.tabset data-height=500}
------------

### **Deaths by speed zone**

```{r accidents-by-speed-zone}
accidents_by_speed_zone <- accidents %>%
  count(speed_zone,
        name = "Accidents")
```

```{r deaths-by-speed-zone}
deaths_by_speed_zone <- accidents %>%
  group_by(speed_zone) %>%  
  tally(no_persons_killed,
        name = "Deaths")
```

```{r deaths-per-accident-by-speed-zone}
deaths_by_accident <- accidents_by_speed_zone %>%
  left_join(deaths_by_speed_zone) %>%
  mutate(Deaths_by_accident = Deaths/Accidents)
```

```{r deaths-per-accident-plot}
deaths_by_accident %>%
  mutate(speed_zone = as.numeric(speed_zone)) %>%
  filter(speed_zone %in% seq(30, 110, 10)) %>% 

  ggplot(aes(y = Deaths_by_accident, 
             x = speed_zone)) + 
  geom_line() +
  labs(x = "Speed Zone",
       y = "Deaths by Accident")

ggplotly()
```

Row {.tabset data-height=500}
------------
### **Death rate by year of vehicle manufacture**

```{r join-person-and-vehicle}
person_vehicle <- persons %>%
  left_join(vehicles)
```

```{r total-people-involved-in-accidents-per-manufature-year}
person_vehicle_total <- person_vehicle %>%
  group_by(vehicle_year_manuf) %>%
  tally(name = "Persons",
        sort = TRUE)

person_vehicle_deaths <- person_vehicle %>%
  filter(inj_level_desc == "Fatality") %>%
  group_by(vehicle_year_manuf) %>%
  tally(name = "Fatalities",
        sort = TRUE)
```

```{r join-total-and-deaths}
death_rate_by_year_manuf <- person_vehicle_total %>%
  left_join(person_vehicle_deaths) %>%
  mutate(death_rate = Fatalities/Persons) %>%
  arrange(desc(vehicle_year_manuf))
```

```{r plot-death-rate-by-year-manuf}
manuf_year_death_rate <- death_rate_by_year_manuf %>%
  filter(vehicle_year_manuf >= 1985 & vehicle_year_manuf < 3001)

p1 <- manuf_year_death_rate %>%
  ggplot(aes(x = vehicle_year_manuf, 
             y = death_rate)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "Year Manufactured",
       y = "Death Rate")

ggplotly(p1)
```

### **Regression model**

```{r regression-model}
manuf_year_death_rate_lm <- lm(death_rate ~ vehicle_year_manuf, data = manuf_year_death_rate)

resid_panel(manuf_year_death_rate_lm, plot = "all")

```

### **Goodness of fit**

```{r}
tidy(manuf_year_death_rate_lm) %>% 
  kable() %>% 
  kable_styling(bootstrap_options = "striped")

glance(manuf_year_death_rate_lm) %>% 
  kable() %>% 
  kable_styling(bootstrap_options = "striped")
```


Column {.sidebar data-width=350}
----

***

> **Findings**

1. Accidents become increasingly serious the faster the speed at which they occur. The risk of dying in an accident is nearly 13 times higher in a 110km/h zone (0.064 deaths/accident) than in a 40km/h zone (0.005 deaths/accident). 


2. A person's risk of dying in an accident is positively correlated with the age of their vehicle; the newer the make, the less likely it is that a person will be killed in an accident. For every year older a vehicle is, the risk of dying if you have an accident in it increases by 0.0002 deaths per accident. The reason for this is improved safety standards for vehicles. 

***